1. MVC project description

Links

Author: Romain Martinez.

2. Setup

In [1]:
# Common imports
import pandas as pd
import numpy as np
import scipy.io as sio
import json

# Path
from pathlib import Path
PROJECT_PATH = Path('./')
DATA_PATH = PROJECT_PATH / 'data'

# Figures
OFFLINE = True
if OFFLINE:
    import plotly.offline as py
    py.init_notebook_mode(connected=True)
else:
    import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
BASE_LAYOUT = go.Layout(hovermode='closest', font=dict(size=14))
MARKER_LAYOUT = dict(
    color='rgba(27, 158, 119, 0.6)',
    line=dict(
        color='rgba(27, 158, 119, 1.0)',
        width=2,
    ))

3. Load the data

In [2]:
df_tidy = pd.read_feather(DATA_PATH / 'df_tidy')
df_tidy_normalized = pd.read_feather(DATA_PATH / 'df_tidy_normalized')
df_wide = pd.read_feather(DATA_PATH / 'df_wide')
with open(DATA_PATH / 'conf.json', 'r') as w:
    conf = json.load(w)

4. Explore the data

4.1. Muscles by dataset

In [3]:
def plot_count_by_dataset(d, values, index, columns, **kwargs):
    table = d.pivot_table(
        values,
        index,
        columns,
        aggfunc=lambda x: len(x) / x.nunique(),
        fill_value=0).astype(int)

    fig = ff.create_annotated_heatmap(
        z=np.array(table),
        x=kwargs.get('xlabel'),
        y=kwargs.get('ylabel'),
        showscale=True,
        colorscale='YlGnBu',
        colorbar=dict(title='Count', titleside='right'))

    fig['layout'].update(BASE_LAYOUT)
    fig['layout'].update(
        dict(
            title=kwargs.get('title'),
            xaxis=dict(title=kwargs.get('xtitle'), side='bottom'),
            yaxis=dict(title=kwargs.get('ytitle'), autorange='reversed'),
            margin=go.Margin(t=80, b=80, l=150, r=80, pad=0)))
    return fig
In [4]:
muscle_by_dataset = plot_count_by_dataset(
    df_tidy,
    values='test',
    index='dataset',
    columns='muscle',
    ylabel=conf['DATASETS'],
    xlabel=conf['MUSCLES'],
    title='Muscles by dataset')
py.iplot(muscle_by_dataset, filename='mvc/muscles_by_dataset')

4.2. Tests by dataset

In [5]:
muscle_by_dataset = plot_count_by_dataset(
    df_tidy,
    values='muscle',
    index='dataset',
    columns='test',
    ylabel=conf['DATASETS'],
    xtitle='Tests',
    title='Tests by dataset')
py.iplot(muscle_by_dataset, filename='mvc/tests_by_dataset')

4.3. Muscles and tests count

In [6]:
def plot_count_bar(d, column, **kwargs):
    count = np.array(d[column].value_counts(sort=False))
    trace = go.Bar(
        x=count, y=kwargs.get('ylabel'), marker=MARKER_LAYOUT, orientation='h')

    layout = BASE_LAYOUT.copy()
    layout.update(
        dict(
            title=kwargs.get('title'),
            xaxis=dict(
                title=kwargs.get('xtitle'), showline=True, linewidth=1.5),
            yaxis=dict(
                title=kwargs.get('ytitle'), showline=True, linewidth=1.5)))

    # adjust y axis
    layout['yaxis'].update(nticks=count.shape[0])
    layout.update(margin=go.Margin(t=80, b=80, l=150, r=80, pad=0))
    return dict(data=[trace], layout=layout)
In [7]:
test_count_bar = plot_count_bar(
    df_tidy, 'test', title='Tests count', xtitle='n', ytitle='Tests')
py.iplot(test_count_bar, filename='mvc/test_count_bar')
In [8]:
muscle_count_bar = plot_count_bar(
    df_tidy, 'muscle', ylabel=conf['MUSCLES'], title='Muscles count', xtitle='n')
py.iplot(muscle_count_bar, filename='mvc/muscle_count_bar')

4.4. Which test normalizes which muscle

In [9]:
def plot_max_by_test(d, **kwargs):
    maximum = d[d['mvc'] == 100].pivot_table(
        values='mvc',
        index='muscle',
        columns='test',
        aggfunc='count',
        fill_value=0)
    maximum = (maximum.div(maximum.sum(axis=1), axis=0) * 100).astype(int)

    fig = ff.create_annotated_heatmap(
        z=np.array(maximum),
        x=kwargs.get('xlabel'),
        y=kwargs.get('ylabel'),
        showscale=True,
        colorscale='YlGnBu',
        colorbar=dict(title='Percentage', titleside='right'))

    fig['layout'].update(BASE_LAYOUT)
    fig['layout'].update(
        dict(
            title=kwargs.get('title'),
            xaxis=dict(title=kwargs.get('xtitle'), side='bottom'),
            yaxis=dict(title=kwargs.get('ytitle'), autorange='reversed'),
            margin=go.Margin(t=80, b=80, l=150, r=80, pad=0)))
    return fig
In [10]:
max_by_test = plot_max_by_test(
    df_tidy_normalized,
    ylabel=conf['MUSCLES'],
    xtitle='Tests',
    title='Which test normalizes which muscle (normalized)')

py.iplot(max_by_test, filename='mvc/max_by_test')

5. Tests selection

5.1. Plot the missing values

In [11]:
def plot_count_nan(d, **kwargs):
    nan_count = d.isnull().sum()
    nan_id = nan_count.index

    trace = go.Bar(x=nan_id, y=nan_count, marker=MARKER_LAYOUT)
    layout = BASE_LAYOUT.copy()
    layout.update(
        dict(
            title=kwargs.get('title'),
            xaxis=dict(
                title=kwargs.get('xtitle'), showline=True, linewidth=1.5),
            yaxis=dict(
                title=kwargs.get('ytitle'), showline=True, linewidth=1.5)))

    annotations = []
    for count, idx in zip(nan_count, nan_id):
        annotations.append(
            dict(
                y=count + 50,
                x=idx,
                text=count,
                font=dict(size=14),
                showarrow=False))
        if count < 50:
            annotations.append(
                dict(
                    y=count + 100,
                    x=idx,
                    text=f'test {idx}',
                    font=dict(size=14),
                    showarrow=True,
                    arrowhead=7,
                    ax=0,
                    ay=-200))
    layout['annotations'] = annotations
    return dict(data=[trace], layout=layout)
In [12]:
nan_count_bar = plot_count_nan(
    df_wide.iloc[:, df_wide.columns != 'muscle'],
    title='NaN count for each test',
    xtitle='Tests',
    ytitle='NaN count')
py.iplot(nan_count_bar, filename='mvc/nan_count_bar')

5.2. Display positions

In [13]:
IMAGES_PATH = Path('./data/positions_mvc.mat')
positions = sio.loadmat(IMAGES_PATH)['positions']
In [14]:
def rgb2gray(rgb):
    return np.dot(rgb[..., :3], [0.299, 0.587, 0.114])


def plot_mvc_positions(positions, id=[], **kwargs):
    n_rows = int(len(id) / 4) + 1
    n_cols = 4
    height = n_rows * 800 / 4
    width = n_cols * 800 / 4
    
    fig = tools.make_subplots(
        rows=n_rows,
        cols=n_cols,
        print_grid=False,
        horizontal_spacing=0.01,
        vertical_spacing=0.05,
        subplot_titles=[f'MVC {i}' for i in id])

    for i, itest in enumerate(id):
        fig.append_trace(
            go.Heatmap(
                z=rgb2gray(positions[0][itest]),
                colorscale='Greys',
                showscale=False,
                name=f'MVC {itest}'),
            row=int(i / n_cols) + 1,
            col=int(i % n_cols) + 1)
        fig['layout']['title'] = kwargs.get('title')
        fig['layout'].update(height=height, width=width)
        fig['layout'][f'xaxis{i+1}'].update(showticklabels=False, ticks='')
        fig['layout'][f'yaxis{i+1}'].update(
            autorange='reversed', showticklabels=False, ticks='')
    return fig
In [19]:
positions_all = plot_mvc_positions(
    positions, id=conf['TESTS'][:-1], title='Illustrations of MVIC tests')

py.iplot(positions_all, filename='mvc/positions_all')